library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# get filenames of all csvs
path_to_csvs <- "~/Google_Drive/Simplification-for-generalization/simplification-data"
path_to_plots <- "~/Google_Drive/Simplification-for-generalization/final-plots/"
all_csvs <- list.files(path = path_to_csvs, pattern = "\\.csv")
# load all csvs into a single dataframe
all_runs <- data.frame()
for (filename in all_csvs) {
tmp_df <- read.csv(paste(path_to_csvs, filename, sep = '/'), stringsAsFactors = F)
colnames(tmp_df) <- sapply(colnames(tmp_df), str_trim)
s <- unlist(str_split(filename, '[.-]'))
s <- s[4:length(s)-1]
tmp_df$problem <- str_c(s, sep = '-', collapse = '-')
all_runs <<- bind_rows(all_runs, tmp_df)
}
# cbbPalette <- c("#E69F00", "#56B4E9", "#000000", "#009E73", "#D55E00", "#CC79A7", "#F0E442", "#0072B2")
cbbPalette <- c("#009E73", "#E69F00", "#56B4E9", "#F0E442", "#000000", "#D55E00", "#0072B2")
unsimplified_that_dont_generalize <- all_runs %>%
filter(type == "unsimplified",
testError > 0) %>%
select(log, problem) %>%
distinct()
unsimplified_that_generalize <- all_runs %>%
filter(type == "unsimplified",
testError == 0) %>%
select(log, problem) %>%
distinct()
started_generalized_lookup <- bind_rows(mutate(unsimplified_that_generalize, started_generalized = 'Generalized Pre-Simplification'),
mutate(unsimplified_that_dont_generalize, started_generalized = "Didn't Generalize Pre-Simplification"))
Nics plot
unsimplified_info <- all_runs %>%
filter(type == "unsimplified") %>%
mutate(genPre = testError == 0) %>%
mutate(originalSize = programSize) %>%
select(problem, log, method, originalSize, genPre)
all_runs %>%
filter(type == "simplified") %>%
mutate(genPost = testError == 0,
simplifiedSize = programSize) %>%
inner_join(unsimplified_info, by = c("problem", "log", "method")) %>%
unite(quad, genPre, genPost, sep = "_") %>%
select(problem, method, originalSize, simplifiedSize, quad) %>%
ggplot(aes(x = originalSize,
y = simplifiedSize)) +
geom_point(aes(color = problem),
alpha = 0.3,
size = 0.3,
shape = 4) +
geom_abline(slope = 1,
intercept = 0,
color = 'grey') +
geom_smooth(method = "lm",
se = FALSE,
color = 'black',
linetype = 2,
size = 0.5) +
facet_grid(quad ~ method) +
theme_bw() +
guides(colour = guide_legend(override.aes = list(alpha = 1)))

Nics plot rotated
unsimplified_info <- all_runs %>%
filter(type == "unsimplified") %>%
mutate(genPre = testError == 0) %>%
mutate(originalSize = programSize) %>%
select(problem, log, method, originalSize, genPre)
all_runs %>%
filter(type == "simplified") %>%
mutate(genPost = testError == 0,
simplifiedSize = programSize) %>%
inner_join(unsimplified_info, by = c("problem", "log", "method")) %>%
unite(quad, genPre, genPost, sep = "_") %>%
select(problem, method, originalSize, simplifiedSize, quad) %>%
ggplot(aes(x = originalSize,
y = simplifiedSize)) +
geom_point(aes(color = method),
alpha = 0.3,
size = 0.5,
shape = 4) +
geom_abline(slope = 1,
intercept = 0,
color = 'grey') +
geom_smooth(method = "lm",
se = FALSE,
color = 'black',
linetype = 2,
size = 0.5) +
facet_grid(quad ~ problem,
scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 15, hjust = 1)) +
guides(colour = guide_legend(override.aes = list(alpha = 1)))

Quad Scatter Plot (Nic’s Plot)
unsimplified_info <- all_runs %>%
filter(type == "unsimplified") %>%
mutate(started_generalized = testError == 0) %>%
mutate(originalSize = programSize) %>%
select(problem, log, method, originalSize, started_generalized)
all_runs %>%
filter(type == "simplified") %>%
mutate(genPost = testError == 0,
simplifiedSize = programSize) %>%
inner_join(unsimplified_info, by = c("problem", "log", "method")) %>%
mutate(post_simp_generalized = ifelse(testError == 0, "Generalized Post-Simplification", "Didn't Generalize Post-Simplification")) %>%
select(problem, originalSize, simplifiedSize, started_generalized, post_simp_generalized) %>%
ggplot(aes(x = originalSize,
y = simplifiedSize)) +
geom_point(aes(color = problem),
alpha = 0.3,
size = 0.5,
shape = 4) +
geom_abline(slope = 1,
intercept = 0,
color = 'grey') +
geom_smooth(method = "lm",
se = FALSE,
color = 'black',
linetype = 2,
size = 0.5) +
facet_grid(started_generalized ~ post_simp_generalized,
scales = "free") +
theme_bw() +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12),
strip.text = element_text(size = 12),
legend.position="none") +
guides(colour = guide_legend(override.aes = list(alpha = 1))) +
labs(x = "Pre-Simplification Size",
y = "Post-Simplification Size")

ggsave(paste0(path_to_plots,'Quad_Scatter.pdf'),
units = c('in'), width = 7.5, height = 4.5)
Problem Vs Avgerage program Size by Method
Table
simp_runs <- all_runs %>%
filter(type == "simplified") %>%
group_by(method, problem) %>%
summarise(avgProgramSize = round(mean(programSize), 3)) %>%
spread(method, avgProgramSize)
prog_size_table <- all_runs %>%
filter(type == "unsimplified") %>%
group_by(problem) %>%
summarise(none = round(mean(programSize), 3)) %>%
inner_join(simp_runs, by = c("problem"))
write.csv(prog_size_table, "prog_size_table.csv")
Plot
horiz_lines_df <- melt(prog_size_table,
id.vars = c("problem"),
measure.vars = c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program"),
value.name = "size") %>%
filter(variable == 'none')
horiz_lines_df$x1 <- seq(0.55, 23.55, 1)
horiz_lines_df$x2 <- seq(1.45, 24.45, 1)
melt(prog_size_table,
id.vars = c("problem"),
measure.vars = c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program"),
value.name = "size") %>%
filter(variable != 'none') %>%
ggplot(aes(x = problem,
y = size,
fill = variable)) +
geom_bar(stat = "identity",
position = "dodge") +
geom_rect(data = horiz_lines_df,
aes(xmin = x1,
xmax = x2,
ymin = 0,
ymax = size),
fill = '#E5E5E5') +
geom_segment(data = horiz_lines_df,
aes(x = x1,
y = size,
xend = x2,
yend = size),
size = 1.5) +
geom_bar(stat = "identity",
position = "dodge") +
theme_bw() +
theme(axis.text.x = element_text(angle = 25, hjust = 1, size = 8),
legend.position="top") +
labs(x = "",
y = "Program Size") +
scale_fill_manual(values = cbbPalette,
name = 'Method',
labels=c("Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Unsimplified", "Program"))

ggsave(paste0(path_to_plots,'Problem_by_Size_bar.pdf'),
units = c('in'), width = 7.5, height = 5)
Problem Vs Percent Generalized by Method
Table
simp_runs <- all_runs %>%
filter(type == "simplified") %>%
mutate(generalized = ifelse(testError == 0, 1, 0)) %>%
group_by(method, problem) %>%
summarise(percentGeneralized = mean(generalized)) %>%
spread(method, percentGeneralized)
perct_generalized_table <- all_runs %>%
filter(type == "unsimplified") %>%
mutate(generalized = ifelse(testError == 0, 1, 0)) %>%
group_by(problem) %>%
summarise(none = mean(generalized)) %>%
inner_join(simp_runs, by = c("problem"))
write.csv(perct_generalized_table, "perct_generalized_table.csv")
Plot
horiz_lines_df <- melt(perct_generalized_table,
id.vars = c("problem"),
measure.vars = c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program"),
value.name = "pct_gener") %>%
filter(variable == 'none')
horiz_lines_df$x1 <- seq(0.55, 23.55, 1)
horiz_lines_df$x2 <- seq(1.45, 24.45, 1)
melt(perct_generalized_table,
id.vars = c("problem"),
measure.vars = c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program"),
value.name = "pct_gener") %>%
filter(variable != 'none') %>%
ggplot(aes(x = problem,
y = pct_gener,
fill = variable)) +
geom_bar(stat = "identity",
position = "dodge") +
geom_rect(data = horiz_lines_df,
aes(xmin = x1,
xmax = x2,
ymin = 0,
ymax = pct_gener),
fill = '#E5E5E5') +
geom_bar(stat = "identity",
position = "dodge") +
geom_segment(data = horiz_lines_df,
aes(x = x1,
y = pct_gener,
xend = x2,
yend = pct_gener),
size = 1.5) +
theme_bw() +
theme(axis.text.x = element_text(angle = 25, hjust = 1, size = 8),
legend.position="top") +
labs(x = "",
y = "% Generalized") +
scale_fill_manual(values = cbbPalette,
name = 'Method',
labels=c("Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Unsimplified", "Program"))

ggsave(paste0(path_to_plots,'Problem_by_PcntGen_bar.pdf'),
units = c('in'), width = 7.5, height = 5)
Simplifified Size vs Percent Generalized
Plot a
all_runs %>%
inner_join(unsimplified_that_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = ifelse(testError == 0, 1, 0)) %>%
group_by(programSize, problem) %>%
summarise(percentGeneralized = mean(generalized)) %>%
ggplot(aes(x = programSize,
y = percentGeneralized)) +
geom_point(size = 1,
alpha = 0.5) +
#facet_wrap(~ problem) +
theme_bw() +
labs(title = "Unsimplified Programs that Generalized",
x = "Post Simplification Program Size",
y = "% Generalized")

Plot b
all_runs %>%
inner_join(unsimplified_that_dont_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = ifelse(testError == 0, 1, 0)) %>%
group_by(programSize, problem) %>%
summarise(percentGeneralized = mean(generalized)) %>%
ggplot(aes(x = programSize,
y = percentGeneralized)) +
geom_point(size = 1,
alpha = 0.5) +
#facet_wrap(~ problem) +
theme_bw() +
labs(title = "Unsimplified Programs that Didn't Generalized",
x = "Post Simplification Program Size",
y = "% Generalized")

Density of simplified size
Not faceted
all_runs %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5) +
#facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(x = "Post Simplification Program Size")

Facet where unsimplified program generalized
all_runs %>%
inner_join(unsimplified_that_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5) +
facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(title = "Unsimplified Programs that Generalized",
x = "Post Simplification Program Size")

Facet where unsimplified program DIDN’T generalized
all_runs %>%
inner_join(unsimplified_that_dont_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5) +
facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(title = "Unsimplified Programs that Didn't Generalized",
x = "Post Simplification Program Size")

Desity (stat = “count”) of simplified size
Not faceted
all_runs %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
theme_bw() +
labs(x = "Post Simplification Program Size") +
scale_y_log10()

Not faceted A
all_runs %>%
inner_join(unsimplified_that_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
theme_bw() +
labs(title = "Unsimplified Programs that Generalized",
x = "Post Simplification Program Size")

Not faceted B
all_runs %>%
inner_join(unsimplified_that_dont_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
theme_bw() +
labs(title = "Unsimplified Programs that Didn't Generalized",
x = "Post Simplification Program Size")

2x2 Density Plot
all_runs %>%
filter(type == "simplified") %>%
inner_join(started_generalized_lookup, by = c("log", "problem")) %>%
mutate(generalized = ifelse(testError == 0, "Generalized Post-Simplification", "Didn't Generalize Post-Simplification")) %>%
ggplot(aes(x = programSize)) +
geom_density(stat = 'count',
fill = cbbPalette[6],
color = cbbPalette[6]) +
facet_grid(started_generalized ~ generalized, scales = 'free_y') +
# facet_grid( generalized ~ started_generalized) +
theme_bw() +
theme(axis.text = element_text(size = 18),
axis.title = element_text(size = 18),
strip.text = element_text(size = 16)) +
labs(x = "Post Simplification Program Size",
y = "Count")

ggsave(paste0(path_to_plots,'Quad_Density.pdf'),
units = c('in'), width = 7.5, height = 4.5)
all_runs %>%
filter(type == "simplified") %>%
inner_join(started_generalized_lookup, by = c("log", "problem")) %>%
mutate(post_simp_generalized = ifelse(testError == 0, "Generalized Post-Simplification", "Didn't Generalize Post-Simplification")) %>%
group_by(started_generalized, post_simp_generalized) %>%
summarise(count = n())
## Source: local data frame [4 x 3]
## Groups: started_generalized [?]
##
## started_generalized
## <chr>
## 1 Didn't Generalize Pre-Simplification
## 2 Didn't Generalize Pre-Simplification
## 3 Generalized Pre-Simplification
## 4 Generalized Pre-Simplification
## # ... with 2 more variables: post_simp_generalized <chr>, count <int>
Facet where unsimplified program generalized
all_runs %>%
inner_join(unsimplified_that_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(title = "Unsimplified Programs that Generalized",
x = "Post Simplification Program Size")

Facet where unsimplified program DIDN’T generalized
all_runs %>%
inner_join(unsimplified_that_dont_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(title = "Unsimplified Programs that Didn't Generalized",
x = "Post Simplification Program Size")

all_runs %>%
inner_join(started_generalized_lookup, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
facet_grid(problem ~ started_generalized, scales = 'free_y') +
theme_bw() +
theme(legend.position="bottom") +
scale_fill_manual(values = cbbPalette[c(6, 3)],
name = 'Generalized Post-Simplification') +
scale_color_manual(values = cbbPalette[c(6, 3)],
name = 'Generalized Post-Simplification') +
labs(x = "Post Simplification Program Size")
